## data analysis '53 paper 
## NWDR results table

rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")


## should municipalities be dropped/recoded to account for possible jamming?
## jamming = km10, km12, km15; adjust = "drop", "min"
jamming <- ""
adjust <- ""


table1 <- matrix(NA,45,4)
rownames(table1) <- c(	"intercept",
						"county capital",
						"KVP base",
						"$%$ agriculture and forestry",
						"$%$ mining",
						"$%$ metalworking industry",
						"$%$ chemical industry",
						"$%$ woods and plastics",
						"$%$ consumer goods",
						"$%$ construction",
						"$%$ transportation",
						"$%$ commerce",
						"$%$ services",
						"log(population size)",
						"$%$ population male",
						"population density",
						"$%$ population change 1939--50",
						"$%$ population in same {\\it Land}",
						"$%$ 8 years of education",
						"$%$ 10 years of education",
						"$%$ 12 years of education",
						"$%$ more than 12 years of education",
						"$%$ trade school degree",
						"$%$ university degree",
						"$%$ protestant",
						"$%$ catholic",
						"$%$ agnostic",
						"living space per capita (m$^2$)", 
						"$%$ turnout '32",
						"$%$ invalid votes '32",
						"$%$ NSDAP '32",
						"$%$ SPD '32",
						"$%$ KPD '32",
						"$%$ Zentrum '32",
						"$%$ DNVP '32",
						"$%$ DVP '32",
						"RIAS/NWDR signal strength",
						"RIAS/NWDR signal strength$^2$",
						"RIAS/NWDR signal strength$^3$",
						"distance to East Berlin (100 km)",
						"distance to East Berlin$^2$ (100 km)",
						"distance to East Berlin$^3$ (100 km)",
						"Wald test RIAS/NWDR $p$-value",
						"Wald test distance $p$-value",
						"district fixed effects")

table1[45,c(1,3)] <- "Yes"



##################################
## probit model 1 --- ITM with VHF
##################################
## choose signal strength measure to employ
signal <- "lrm_all"

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))

##  We need to manually fix RIAS/NWDR signal strength data for two municipalities
##  that are just outside GDR
sel <- which(dat$signal < 0)
dat[sel[1],]
dat[sel[2],]

dat$signal[sel[1]] <- 54.993473
dat$signal[sel[2]] <- 66.511688

stopifnot(all(dat$signal > 0))
range(dat$signal)


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


source("hl.R")
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,1:2] <- hl(temp[1:42,1:2])


## Wald test RIAS/NWDR
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,1] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,1] <- round(1 - pchisq(W, df = nrow(Q)),3)



#####################################
## probit model 2 --- ITM without VHF
#####################################
## choose signal strength measure to employ
signal <- "lrm_noukw"

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## manually fix signal strengths for two municipalities
sel <- which(dat$signal < 0)
dat[sel[1],]
dat[sel[2],]

dat$signal[sel[1]] <- 55.050777
dat$signal[sel[2]] <- 61.267841

stopifnot(all(dat$signal > 0))
range(dat$signal)


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,3:4] <- hl(temp[1:42,1:2])


## Wald test RIAS/NWDR
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


latex(table1, file = "")












.





